all_genes = read.csv('~/feature_file/gene_master.csv')
gene_700 = subset(all_genes,length>700)

file_name = c('wt_pulse','wt_c10','wt_c15','wt_c20','wt_c40',
              'cac_pulse','cac_c10','cac_c15','cac_c20','cac_c40')

#define the nucleosomal read boudaries 
length_min = 140
length_max = 180

#reformat dataframe
acf.df = gene_700[,c(1,2,3,4,6,7)]

for(i in 7:16){
  acf.df[,i] <- NA
}

colnames(acf.df)[7:16] = file_name

#calculate acf for each gene 
for(g in 1:nrow(gene_700)){
  chr = gene_700$chr[g]
  if(gene_700$strand[g] =='+'){
    new_start = gene_700$tss[g]
    new_end = gene_700$tss[g]+700
  }else{
    new_start = gene_700$tss[g]-700
    new_end = gene_700$tss[g]
  }
  
  # create a GenomicRanges object with the information above to retrieve the read information from the bam file
  chr.gr = GRanges(seqnames= chr, ranges = IRanges(start =new_start , end = new_end ))
  
  p = ScanBamParam(what = c("rname", "strand", "pos", "isize"),which = chr.gr)
  
  for (f in 1:10){
    
    #data files
    file_name.bam = paste("~/data/bam_bai/",file_name[f],".bam", sep='')
    file_name.bam.bai = paste("~/data/bam_bai/",file_name[f],".bam.bai",sep='')
    
    A_reads.l = scanBam(file = file_name.bam, 
                        index = file_name.bam.bai,
                        param = p)
    
    #create a new GenomicRanges object for the reads from this list:
    A_reads.gr = GRanges(seqnames = A_reads.l[[1]]$rname,
                         ranges = IRanges(start = A_reads.l[[1]]$pos, 
                                          width = A_reads.l[[1]]$isize))
    
    #find bp overlap with midpoints of reads. 
    mat.gr = GRanges(seqnames = chr, ranges = IRanges(start= seq(new_start, new_end, by =1), width=1 ))  
    
    #taking only the midpoint
    subset_data.gr = A_reads.gr[which(width(A_reads.gr) >= length_min & width(A_reads.gr) <= length_max)]
    
    #finding the mipoints of those reads 
    midpoints.gr =IRanges(start=mid(ranges(subset_data.gr)), width=1) 

    midpoints = as.data.frame(midpoints.gr)
    
    if(length(midpoints.gr)==0){
      next
    }
    
    A<-acf(predict(smooth.spline(density(midpoints$start, bw=30)),new_start:new_end)$y, lag.max=max(midpoints$start-min(midpoints$start)), plot=F)
    a = A[[1]]
    
    #correlation with a pre-determined nucleosomee phasing of 172 bp 
    if(length(a) < 172){
      acf.df[g,6+f] <- round(max(a[(length(a)/2):length(a)]), 3)
    } else {
      acf.df[g,6+f]<- round(a[172], 3)
    }
    
  }# end file for
  
  if(g %% 100 ==0){
    cat(paste("Gene # ",g,'\n'))
  }
}# end gene for

write.csv(acf.df,file='~/data/acf_score.csv',row.names = FALSE)



